/***************************************************************************
 *
 * Copyright (C) 2001 International Business Machines
 * All rights reserved.
 *
 * This file is part of the GPFS mmfslinux kernel module.
 *
 * Redistribution and use in source and binary forms, with or without 
 * modification, are permitted provided that the following conditions 
 * are met:
 *
 *  1. Redistributions of source code must retain the above copyright notice, 
 *     this list of conditions and the following disclaimer. 
 *  2. Redistributions in binary form must reproduce the above copyright 
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution. 
 *  3. The name of the author may not be used to endorse or promote products 
 *     derived from this software without specific prior written
 *     permission. 
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 *************************************************************************** */
/*
 * Inode operations
 *
 * Contents:
 *   printInode
 *   printDentry
 *   cxiSetOSNode
 *   cxiInvalidatePerm
 *   getIattr
 *   get_umask
 *   setCred
 *   gpfs_i_create
 *   gpfs_i_lookup
 *   gpfs_i_link
 *   gpfs_i_unlink
 *   gpfs_i_symlink
 *   gpfs_i_mkdir
 *   gpfs_i_rmdir
 *   gpfs_i_mknod
 *   gpfs_i_rename
 *   gpfs_i_readlink
 *   gpfs_i_follow_link
 *   gpfs_i_readpage
 *   gpfs_i_writepage
 *   gpfs_i_truncatepage
 *   gpfs_i_bmap
 *   gpfs_i_truncate
 *   gpfs_i_permission
 *   gpfs_i_smap
 *   gpfs_i_updatepage
 *   gpfs_i_revalidate
 *   gpfs_i_setattr
 *   gpfs_i_setattr_internal
 *   gpfs_i_getattr
 *   gpfs_i_getattr_internal
 *
 * $Id: inode.c,v 1.43 2001/10/09 17:45:31 dcraft Exp $
 *
 * $Log: inode.c,v $
 * Revision 1.43  2001/10/09 17:45:31  dcraft
 * Fixes for running on 2.4.9-ac kernel series. (behind ifdefs)
 *
 * Revision 1.42  2001/09/28 20:46:54  wyllie
 * Include more operations in vfsstats counters
 *
 * Revision 1.41  2001/09/20 05:57:16  wsawdon
 * Renamed va_xperm to va_xinfo and definef bit flags to allow var to be
 * shared between extended permissions and snapLatest entries.
 *
 * Revision 1.40  2001/09/12 05:45:51  schmuck
 * On ACL changes, permission information cached in the Linux inode
 * was not being updated correctly.
 *
 * Revision 1.39  2001/09/06 22:35:33  wyllie
 * Change revalidate trace from level 1 to level 2
 *
 * Revision 1.38  2001/09/04 15:51:21  eshel
 * Performance improvement to unlock the kernel lock on calls from NFSD.
 *
 * Revision 1.37  2001/08/10 22:51:39  tee
 * Avoid unnecessary cast.
 *
 * Revision 1.36  2001/08/10 22:33:03  tee
 * Declare functions with correct argument types to avoid unnecessary casting.
 * Cosmetic changes to make AIX and Linux mmap code more similar.
 *
 * Revision 1.35  2001/08/09 21:11:19  dcraft
 * Modifications to allow running on latest Redhat 7.1 update
 * Kernel version 2.4.3-12.
 * Requires checkout of new site.mcr.proto
 *
 * Revision 1.34  2001/08/06 23:37:59  wyllie
 * Add contents section.  Change trace of BKL.
 *
 * Revision 1.33  2001/08/04 00:42:26  tee
 * Remove LINUX_MMAP ifdefs
 *
 * Revision 1.32  2001/07/19 17:39:06  manoj
 * Allow readpage to service sendfile(). Not complete (need to invalidate Linux's
 * cached inode pages, either on return from readpage or on a wx/sx BRL request).
 *
 * Revision 1.31  2001/07/09 15:56:38  wyllie
 * Partial support for running on kernel version 2.4.6
 *
 * Revision 1.30  2001/06/19 17:45:37  eshel
 * Add gpfsSyncNFS to sync attributes for NFS setattr call.
 *
 * Revision 1.29  2001/06/15 18:00:23  wyllie
 * Fix a trace
 *
 * Revision 1.28  2001/05/30 20:41:28  wyllie
 * Trace inode numbers more consistently
 *
 * Revision 1.27  2001/05/18 02:11:28  tee
 * Fix stack overflow in gpfs_i_follow_link which was keeping a 4K data buffer
 * on the stack.  This routine can be entered recursively up to 8 times and
 * the stack is only 8K.  Even without recursion, the current task structure
 * (which lives below the stack) could be clobbered leading to various
 * problems such as seg faults or hangs.
 *
 * Revision 1.26  2001/05/17 01:22:52  tee
 * Fix seg fault in gpfs_i_follow_link due to trace statement using a pointer
 * that might not be initialized.
 *
 * Revision 1.25  2001/05/16 20:17:26  eshel
 * Define lookup operation only for directory files. LINUX determines if a file
 * is a directory or not base on the availability of this lookup operation in
 * the struct inode_operations.
 *
 * Revision 1.24  2001/05/12 18:38:26  schmuck
 * Fix problem with NFS returning spurious errors under load.
 *
 * Revision 1.23  2001/05/10 01:56:44  wsawdon
 * Fixed bugs in linux kernel code related to stat().
 * stat() must return external inode number for snapshots.
 * Also fixed bugs returning rdev for normal files
 * and size for special files.
 *
 * Revision 1.22  2001/05/04 23:30:12  schmuck
 * Move "extern struct xxx_operations ..." declarations into .h file
 * instead of replicating them in various .c files.
 * Replace empty gpfs_dops_valid table with a NULL pointer.
 *
 * Revision 1.21  2001/05/02 02:08:02  schmuck
 * Apply recent tortureDir test related fixes to gpfs_i_link:
 * Return from kSFSLink with an inode lock held; set the CO_VFS_REFERENCE
 * flag an instantiate the dcache entry before releasing the lock.
 *
 * Revision 1.20  2001/05/02 00:21:23  schmuck
 * Fix another problem found by tortureDir test on Linux:
 * On lookup and create, instantiate the dcache entry while holding the
 * inode lock, or, in case of a negative dcache entry, the directory lock.
 * This closes a window where a token revoke could clear the
 * CO_VFS_REFERENCE flag without invalidating the dcache entry.
 * It also eliminates the need for a d_revalidate operation.
 *
 * Revision 1.19  2001/04/25 20:22:10  eshel
 * Make sure we get the i_rdev set at the end of mknod call.
 *
 * Revision 1.18  2001/04/24 00:55:09  eshel
 * Set vector table for special files so gpfs will not get these operations.
 *
 * Revision 1.17  2001/04/23 23:08:31  dcraft
 * Fix disable_lock so it actually does what it says it does.
 * Perform FEXCL check before access check on create race condition.
 *
 * Revision 1.16  2001/04/23 21:09:38  radhak
 * Defect 337635:
 * When deleting an inode, the cxiFreeOSNode() is setting
 * inode->i_mapping->a_ops pointer to NULL causing segmentation in
 * truncate_list_inode_pages() while cleaning mmaped pages.
 *
 * Revision 1.15  2001/04/23 18:11:26  eshel
 * Rename createThreadId to createRaceLoserThreadId and fix logic error.
 *
 * Revision 1.14  2001/04/22 16:36:39  dcraft
 * Reimplement wait queue structure to have a chain of linux
 * wait queue heads and only one waiter per head.  This allows
 * us to control exactly which task will wake up.  Previously
 * the OS was free to select any task on the wait queue head.
 * This gave us incorrect semantics for "wakeup with result"
 * and resulted in crashes stating unexpected EINTR from wait.
 *
 * Revision 1.13  2001/04/20 23:03:07  eshel
 * If a regular file is created by the sys_open call (for now we can not tell if
 * the call is from sys_mknod or sys_open) and the file is found, return rc 0,
 * remember the thread that called create. Later on the open call the open flags
 * are available and if it is the same thread, and FEXCL was on fail it with
 * EEXIST, also check permission since linux assumes that this process created
 * the file and did not do any permission check.
 *
 * Revision 1.12  2001/04/11 20:05:26  dcraft
 * fix dentry instantiation race conditions since no multinode
 * serialization is held.  add d_revalidate capability for negative
 * name dentries.  d_delete calls are the responsibility of the kernel
 * (not gpfs).
 *
 * Revision 1.11  2001/04/09 21:06:00  eshel
 * Add code to keep OS node (linux inode) attributes up to date.
 *
 * Revision 1.10  2001/04/09 20:26:06  dcraft
 * fix dereference of null pointer on enoent lookup
 *
 * Revision 1.9  2001/04/08 22:18:29  dcraft
 * Fix multinde delete race conditions.  Still incomplete.
 *
 * Revision 1.8  2001/04/05 23:15:17  eshel
 * remove i_nlink changes which are now done by cxiUpdateInode() that is called
 * from ChangeNlink() in the deamon while holding lock.
 *
 * Revision 1.7  2001/04/05 13:31:11  gjertsen
 * Continue C++ to C conversion with manual C++2C utility.
 * Changes primarily for vfs stat stuff.
 *
 * Revision 1.75  2001/04/04 21:14:42  dcraft
 * Invalidate inode attributes (particularly i_nlink) when getVattr() can no longer
 * find inode.   Update attributes after rename over an existing file, so d_move
 * will correctly kill target dentry.   Add printing of dentries when "mmfsadm dump vnodes"
 * is executed.  Initial implementation of gpfs_d_revalidate.
 *
 * Revision 1.74  2001/03/26 18:29:57  dcraft
 * Update inode attributes in OS node layer via callback to cxiSetOSNode
 * (previous setInode).  The attributes are now updated during kSFSGetattr()
 * while the lock is held to ensure validity.
 *
 * Revision 1.73  2001/03/15 16:49:26  eshel
 * Call permission check only if got access mode.
 *
 * Revision 1.72  2001/03/05 23:28:11  dcraft
 * Modify inode and gpfsNode reference management.  Inode is now acquired
 * during gpfsNode creation and must be released via cxiPutOSNode().
 * (documented in gpfs database).  Add "mmfsadm dump vnodes" for producing
 * trace info on all held inodes.
 *
 * Revision 1.71  2001/03/03 03:19:16  dixonbp
 * On link/unlink, i_nlink should be updated and when gpfs_i_setattr
 * changes mode, i_mode needs to be updated.
 *
 * Revision 1.70  2001/02/23 17:34:55  dixonbp
 * Maintain the generation in the linux inode.
 *
 * Revision 1.69  2001/02/15 17:28:45  eshel
 * Use adjusted inode numbers for snapshot inodes.
 *
 * Revision 1.68  2001/02/08 18:03:40  schmuck
 * Tweak fast path through gpfs_i_permission for files that don't have
 * extended acls: instead of doing the check in gpfs_i_permission,
 * set inode_operations::permission to NULL so Linux will do the check
 * without invoking gpfs_i_permission.
 * No functional change.
 *
 * Revision 1.67  2001/01/31 17:32:32  dixonbp
 * Fix some comments.
 *
 * Revision 1.66  2001/01/27 15:42:21  dixonbp
 * NFS fixes to cxiCloseNFS and gpfs_f_lock.  Remove incorrect nfs handling
 * in gpfs_i_validate, and start to handle a nfs problem with gpfs_i_lookup.
 *
 * Revision 1.65  2001/01/27 00:36:28  eshel
 * add code to support gpfs_iwritedir()
 *
 * Revision 1.64  2001/01/24 16:41:30  schmuck
 * Remove unnecessary NB_PERF ifdefs.
 *
 * Revision 1.63  2001/01/19 20:55:12  radhak
 * LINUX_MMAP: linux 2.4.0 kernel rework.
 * For time being disabled single node mmap symantics because of lack of
 * interface to invalidate memory mapped pages
 *
 * Revision 1.62  2001/01/09 15:26:49  dixonbp
 * Handle case where NFS calls gpfs_i_revalidate even though the dentry
 * has been invalidated (and it should be calling gpfs_d_revalidate).
 *
 * Revision 1.61  2000/12/29 22:22:32  radhak
 * Defect 322452: Before calling gpfs_filemap_sync get lock.
 * Also added some traces.
 *
 * Revision 1.60  2000/12/19 21:11:57  wyllie
 * Remove assertions and traces about the state of the Linux BKL.  Linux does
 * not keep track of who owns the lock, so these asserts were blowing up on
 * an SMP if the kernel lock happened to be held by the other processor.
 *
 * Revision 1.59  2000/12/19 16:10:28  wyllie
 * Move Linux mmap declarations out of platform-independent code
 *
 * Revision 1.58  2000/12/18 13:53:16  gjertsen
 * More cleanup of comments/documentation.
 *
 * Revision 1.57  2000/12/15 13:56:42  gjertsen
 * Clean up documentation.
 *
 * Revision 1.56  2000/12/09 20:33:36  schmuck
 * Instead of setting/checking flags to invalidate/revalidate dcache entries,
 * simply set dentry::d_ops to point to different dentry_operations tables:
 * one where the d_revalidate field is NULL (means the dentry is valid),
 * and one where d_revalidate points at a function that always returns false
 * (means the dentry is invalid).
 *
 * Revision 1.55  2000/12/08 22:16:04  schmuck
 * Add a call to invalidate negative dcache entries when a directory
 * byte-range token is relinquished.  This allows gpfs_d_revalidate
 * to return the correct answer for negative dcache entries, avoiding
 * an extra call to lookup.
 *
 * Revision 1.54  2000/12/08 02:43:09  schmuck
 * The d_revalidate operation is supposed to check whether the cached
 * directory entry is still valid, not whether the file it refers to
 * still exists.
 *
 * Revision 1.53  2000/12/07 22:16:31  schmuck
 * setCred: use current->fsuid, fsgid instead of current->uid, gid,
 * as other Linux file systems do.
 * Fix bad parameter in trace.
 *
 * Revision 1.52  2000/12/07 21:38:24  schmuck
 * Add a call to invalidate stat information cached in the Linux inode when
 * the inode token is relinquished/downgraded, so that gpfs_i_revalidate
 * optimization works correctly with multiple nodes.
 * Add similar optimization for gpfs_i_permission.
 * Remove NB_PERF ifdef from this code.
 *
 * Revision 1.51  2000/12/04 19:04:13  schmuck
 * Re-do changes from Revision 1.47 that got lost in 1.48.
 *
 * Revision 1.50  2000/12/04 18:20:35  eshel
 * Zero privVfsP pointer in the super block only after unmount is complete and
 * remove some check for null pointer.
 *
 * Revision 1.49  2000/12/04 17:49:54  wyllie
 * Do not allow 'read' of a directory, only 'readdir'.  Otherwise the prefetch
 * code could build a buffer descriptor for a hole in a directory, and the read
 * would populate the buffer with binary zeros.  A later fast lookup would find
 * a directory block with an invalid format and barf.
 *
 * Revision 1.48  2000/12/03 01:48:07  radhak
 * LINUX_MMAP: mmap flush
 *
 * Revision 1.47  2000/12/01 02:10:59  schmuck
 * Instead of assigning NULL function pointers when initializing or resetting
 * the gpfs_operations table, have it point to a dummy function that returns
 * ENOSYS.  This avoids having to check for NULL before each call.
 *
 * Revision 1.46  2000/11/16 01:01:04  wyllie
 * Split definition of ext_cred_t and the ACL helper routines that examine
 * it into platform-dependent versions, and simplify the Linux versions.
 * Give ACL helper routines cxi prefixes.
 *
 * Revision 1.45  2000/11/14 17:10:47  wyllie
 * Make it clearer that gpfsAccess is always called with who==ACC_SELF
 *
 * Revision 1.44  2000/11/08 02:43:31  radhak
 * Linux mmap: remove CONFIG_HIGHMEM linux kernel build checking.
 *
 * Revision 1.43  2000/11/08 01:10:19  radhak
 * More linux mmap code
 *
 * Revision 1.42  2000/11/06 19:56:13  gjertsen
 * Linux code cleanup and put in build safeguards.
 *
 * Revision 1.41  2000/11/03 20:27:00  dcraft
 * Build SMP, UP, NOHIGHMEM, and 4G memory variations of mmfslinux in
 * one pass.   Names are mmfslinux_UP1G, mmfslinux_UP4G, mmfslinux_SMP1G,
 * mmfslinux_SMP4G.
 *
 * Revision 1.40  2000/11/02 19:46:25  gjertsen
 * Linux code split. Pull out NBD stuff.
 *
 *
 */

#include <Shark-gpl.h>

#include <linux/fs.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/errno.h>
#include <linux/smp_lock.h>
#include <linux/mm.h>
#include <linux/highmem.h>

#include <cxiMode.h>
#include <cxiSystem.h>
#include <cxi2gpfs.h>
#include <cxiVFSStats.h>
#include <cxiCred.h>

#include <linux2gpfs.h>
#include <Trace.h>
#include <cxiMmap.h>

#ifdef MODULE
#include <linux/module.h>
#endif /* MODULE */


void
printInode(struct inode *iP)
{
  TRACE7(TRACE_VNODE, 3, TRCID_PRINTINODE_1,
         "printInode: iP 0x%lX i_ino %d (0x%X) count %d dev 0x%X mode 0x%X nlink %d\n",
         iP, iP->i_ino, iP->i_ino, atomic_read((atomic_t *)&iP->i_count),
         iP->i_dev, iP->i_mode, iP->i_nlink);
  TRACE6(TRACE_VNODE, 3, TRCID_PRINTINODE_2,
         "printInode: uid %d gid %d rdev 0x%X size %lld atime 0x%X "
         "mtime 0x%X\n", iP->i_uid, iP->i_gid, iP->i_rdev, iP->i_size,
         iP->i_atime, iP->i_mtime);

  TRACE5(TRACE_VNODE, 3, TRCID_PRINTINODE_3,
         "printInode: ctime 0x%X blksize 0x%X blocks %d ver 0x%X op 0x%lX\n",
         iP->i_ctime, iP->i_blksize, iP->i_blocks, iP->i_version,
         iP->i_op);

  TRACE5(TRACE_VNODE, 3, TRCID_PRINTINODE_4,
         "printInode: fop 0x%lX sb 0x%lX flags 0x%X gen %d generic 0x%lX\n",
         iP->i_fop, iP->i_sb, iP->i_attr_flags, iP->i_generation,
         iP->u.generic_ip);

  TRACE6(TRACE_VNODE, 3, TRCID_PRINTINODE_5,
         "printInode: hash 0x%lX next 0x%lX prev 0x%lX list 0x%lX "
         "next 0x%lX prev 0x%lX\n",
         &(iP->i_hash), iP->i_hash.next, iP->i_hash.prev,
         &(iP->i_list), iP->i_list.next, iP->i_list.prev);
}

void
printDentry(struct dentry *dP)
{
  TRACE4(TRACE_VNODE, 3, TRCID_PRINTDENTRY_1,
         "printDentry: dentry 0x%lX count %d flags 0x%X inode 0x%lX\n",
         dP, atomic_read((atomic_t *)&dP->d_count), dP->d_flags, dP->d_inode);

# if LINUX_KERNEL_VERSION >= 2040600
    TRACE2(TRACE_VNODE, 3, TRCID_PRINTDENTRY_2a,
           "printDentry: parent 0x%lX hash 0x%lX\n",
           dP->d_parent, dP->d_hash.next);
# else
    TRACE3(TRACE_VNODE, 3, TRCID_PRINTDENTRY_2,
           "printDentry: parent 0x%lX vfsmnt 0x%lX hash 0x%lX\n",
           dP->d_parent, dP->d_vfsmnt.next, dP->d_hash.next);
# endif

  TRACE4(TRACE_VNODE, 3, TRCID_PRINTDENTRY_3,
         "printDentry: lru 0x%lX child 0x%lX subdirs 0x%lX alias 0x%lX\n",
         dP->d_lru.next, dP->d_child.next, dP->d_subdirs.next, dP->d_alias.next);

# if LINUX_KERNEL_VERSION >= 2040600
    TRACE5(TRACE_VNODE, 3, TRCID_PRINTDENTRY_4a,
           "printDentry: time 0x%X op 0x%lX sb 0x%lX d_vfs_flags 0x%X name 0x%lX\n",
           dP->d_time, dP->d_op, dP->d_sb, dP->d_vfs_flags, dP->d_name.name);
# else
    TRACE5(TRACE_VNODE, 3, TRCID_PRINTDENTRY_4,
           "printDentry: time 0x%X op 0x%lX sb 0x%lX reftime 0x%X name 0x%lX\n",
           dP->d_time, dP->d_op, dP->d_sb, dP->d_reftime, dP->d_name.name);
# endif

  TRACE2(TRACE_VNODE, 3, TRCID_PRINTDENTRY_5,
         "printDentry: fsdata 0x%X iname '%s'\n", dP->d_fsdata, dP->d_iname);
}

void
cxiSetOSNode(void *osVfsP, cxiNode_t *cnP, cxiVattr_t *attrP)
{
  struct super_block *sbP = (struct super_block *)osVfsP;
  struct inode *inodeP = (struct inode *)cnP->osNodeP;

  DBGASSERT(inodeP != NULL);
  DBGASSERT(inodeP->u.generic_ip == cnP);
  DBGASSERT(inodeP->i_sb == sbP);

  inodeP->i_mode = attrP->va_mode;
  inodeP->i_nlink = attrP->va_nlink;
  inodeP->i_uid  = attrP->va_uid;
  inodeP->i_gid  = attrP->va_gid;
  inodeP->i_rdev = cxiDevToKernelDev(attrP->va_rdev);
  inodeP->i_size = attrP->va_size;
  inodeP->i_atime = attrP->va_atime.tv_sec;
  inodeP->i_mtime = attrP->va_mtime.tv_sec;
  inodeP->i_ctime = attrP->va_ctime.tv_sec;
  inodeP->i_blksize = attrP->va_blocksize;
  inodeP->i_blocks = attrP->va_blocks;
  inodeP->i_generation = attrP->va_gen;
  inodeP->i_flags = 0;

  cnP->xinfo = attrP->va_xinfo;
  cnP->icValid = CXI_IC_ALL;

  switch (inodeP->i_mode & S_IFMT)
  {
    case S_IFREG:
      if (attrP->va_xinfo & VA_XPERM)
        inodeP->i_op = &gpfs_iops_xperm;
      else
        inodeP->i_op = &gpfs_iops_stdperm;
      inodeP->i_fop = &gpfs_fops;
      break;

    case S_IFDIR:
      if (attrP->va_xinfo & VA_XPERM)
        inodeP->i_op = &gpfs_dir_iops_xperm;
      else
        inodeP->i_op = &gpfs_dir_iops_stdperm;
      inodeP->i_fop = &gpfs_dir_fops;
      break;

    case S_IFLNK:
      inodeP->i_op = &gpfs_link_iops;
      inodeP->i_fop = &gpfs_fops;
      break;

    case S_IFBLK:
    case S_IFCHR:
    case S_IFIFO:
    case S_IFSOCK:
      /* Set vector table for special files, gpfs will not get these
         operations. */
      init_special_inode(inodeP, inodeP->i_mode,
                         cxiDevToKernelDev(inodeP->i_rdev));
      break;
  }
  if (inodeP->i_mapping)
    inodeP->i_mapping->a_ops = &gpfs_aops;

  TRACE7(TRACE_VNODE, 2, TRCID_LINUXOPS_SETINODE,
         "cxiSetOSNode: inodeP 0x%lX i_ino 0x%X i_count %d i_mode 0x%X "
         "i_xinfo 0x%X i_nlink %d i_size %lld\n",
         inodeP, inodeP->i_ino, atomic_read((atomic_t *)&inodeP->i_count),
         inodeP->i_mode, attrP->va_xinfo, inodeP->i_nlink, inodeP->i_size);
  return;
}


/* The following function is called from cxiInvalidateAttr when the
   CXI_IC_PERM option was specified, which indicates that permission related
   attributes cached in the struct inode (owner, mode, etc.) are no longer
   known to be valid. */
void
cxiInvalidatePerm(cxiNode_t *cnP)
{
  struct inode *inodeP = (struct inode *)cnP->osNodeP;

  TRACE3(TRACE_VNODE, 2, TRCID_CXIINVA_PERM,
         "cxiInvalidatePerm: cnP 0x%lX std %d dir std %d",
         cnP, inodeP->i_op == &gpfs_iops_stdperm,
         inodeP->i_op == &gpfs_dir_iops_stdperm);

  /* Set the inode operation table to gpfs_..._xperm; the next permission
     check will then go through our gpfs_i_permission function, which will
     revalidate permission attributes and set the inode operation table
     back to gpfs_..._stdperm, if appropriate. */
  if (S_ISREG(inodeP->i_mode) |
      S_ISLNK(inodeP->i_mode))
    inodeP->i_op = &gpfs_iops_xperm;

  else if (S_ISDIR(inodeP->i_mode))
    inodeP->i_op = &gpfs_dir_iops_xperm;
}


void
getIattr(struct inode *inodeP, struct iattr *attrP)
{
  // attrP->ia_valid = ??? ;
  attrP->ia_mode = inodeP->i_mode;
  attrP->ia_uid = inodeP->i_uid;
  attrP->ia_gid = inodeP->i_gid;
  attrP->ia_size = inodeP->i_size;
  attrP->ia_atime = inodeP->i_atime;
  attrP->ia_mtime = inodeP->i_mtime;
  attrP->ia_ctime = inodeP->i_ctime;
  // attrP->ia_attr_flags;
  return;
}

static inline int
get_umask()
{
  return (current->fs->umask);
}


/* Record credentials of current thread */
void
setCred(ext_cred_t *credP)
{
  int nGroups;

  credP->principal = current->fsuid; /* user id */
  credP->group = current->fsgid;     /* primary group id */

  nGroups = MIN(current->ngroups, ECRED_NGROUPS);
  credP->num_groups = nGroups;
  if (nGroups > 0)
    memcpy(credP->eGroups, current->groups, nGroups*sizeof(gid_t));
}

/* inode_operations */

/* Called with a negative (no inode) dir cache entry.
 * If this call succeeds, we fill in with d_instantiate(). 
 */

int
gpfs_i_create(struct inode *diP, struct dentry *dentryP, int mode)
{
  int rc;
  struct gpfsVfsData_t *privVfsP;
  cxiNode_t *dcnP;
  cxiNode_t *cnP = NULL;
  cxiIno_t iNum = (cxiIno_t)-1;
  struct inode *newInodeP = NULL;
  int flags = FWRITE | FCREAT | FEXCL;
  cxiMode_t umask = get_umask();
  ext_cred_t eCred;

  VFS_STAT_START(createCall);
  TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_CREATE_ENTER,
         "gpfs_i_create enter: iP 0x%lX dentryP 0x%lX mode 0x%X name '%s'\n",
         diP, dentryP, mode, dentryP->d_name.name);
  /* BKL is held at entry */

  dcnP = VP_TO_CNP(diP);
  privVfsP = VP_TO_PVP(diP);
  DBGASSERT(privVfsP != NULL);

  setCred(&eCred);

retry:

  rc = gpfs_ops.gpfsCreate(privVfsP, dcnP, (void **)&newInodeP, &cnP, &iNum, 0,
                           flags, dentryP, (char *)dentryP->d_name.name,
                           mode, umask, NULL, &eCred);
  if (rc == 0)
  {
    DBGASSERT(cnP != NULL);
    DBGASSERT(iNum != -1);
    DBGASSERT(newInodeP != NULL);
    DBGASSERT(newInodeP->u.generic_ip == cnP);
    DBGASSERT(cnP->osNodeP == (void *)newInodeP);
    cnP->createRaceLoserThreadId = 0;
  }

  /* linux would normally serialize the creates on a directory (via the 
   * parent directory semaphore) to ensure that a create didn't fail with 
   * EEXIST.  However in a multinode environment we may perform a lookup 
   * on one node (thinking the file doesn't exist) yet a create is 
   * performed on a different node before linux can call the physical
   * file systems create.  We attempt to reconcile this case by marking
   * the fact that this happened and checking the FEXCL flag at gpfs_f_open()
   * to see if we should have failed this with EEXIST.
   */
  if (rc == EEXIST)
  {
    /* Make sure that this create call is part of the linux open call.  NFS
       and mknod calls create without an open, so check that this is not one
       of those calls. On the open call the open flags are available and if
       the FEXCL was on fail it with EEXIST. */
    int mode1;

    /* Skip if NFS create call. */
    if (cxiIsNFSThread())
      goto retExist;

    /* ??? if (sys_mknod call) goto xerror; */

    /* Do it only if trying to create a regular file. */
    if (((mode & S_IFMT) != 0) && !(mode & S_IFREG))
      goto retExist;

    rc = gpfs_ops.gpfsLookup(privVfsP, (void *)diP, dcnP,
                             dentryP, (char *)dentryP->d_name.name,
                             (void **)&newInodeP, &cnP, &iNum, NULL,
                             &mode1, &eCred);
    if (rc == ENOENT)
      goto retry;
    if (!rc)
    {
      /* If the file that was found was a directory than return the
         return code that linux would have returned. */
      if (S_ISDIR(newInodeP->i_mode))
      {
        rc = EISDIR;
        goto retExist;
      }
      cnP->createRaceLoserThreadId = cxiGetThreadId();
    }
  }

retExist:
  if (rc)
  {
    d_drop(dentryP);
    goto xerror;
  }
#if LINUX_KERNEL_VERSION >= 2040900
  diP->i_sb->s_dirt = 1;
#endif

xerror:
  TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_CREATE_EXIT,
         "gpfs_i_create exit: new inode 0x%lX iNum %d (0x%X) rc %d\n",
         newInodeP, iNum, iNum, rc);

  VFS_STAT_STOP;
  return -rc;
}

/* If this routine successfully finds the file, it should
 * add the dentry to the hash list with d_add() and return
 * null.  If a failure occurs then return non null and the
 * dentry will be dput() by the linux lfs layer
 */
struct dentry *
gpfs_i_lookup(struct inode *diP, struct dentry *dentryP)
{
  int code = 0;
  int rc = 0;
  struct dentry *retP = NULL;
  struct gpfsVfsData_t *privVfsP;
  ext_cred_t eCred;
  cxiNode_t *dcnP;
  cxiMode_t mode = 0;
  cxiIno_t iNum = (cxiIno_t)-1;
  cxiNode_t *cnP = NULL;
  struct inode *newInodeP = NULL;

  VFS_STAT_START(lookupCall);
  TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_LOOKUP_ENTER,
         "gpfs_i_lookup enter: diP 0x%lX dentryP 0x%lX name '%s'\n",
         diP, dentryP, dentryP->d_name.name);
  /* BKL is held at entry */

  dcnP = VP_TO_CNP(diP);
  privVfsP = VP_TO_PVP(diP);
  DBGASSERT(privVfsP != NULL);

  setCred(&eCred);

  if (!dcnP)
  {
    /* This can happen due to a bug in linux/fs/dcache.c (prune_dcache)
       where "count" entries are to be pruned, but the last one is
       found to be recently referenced.  When this happens, count is
       decremented, but the loop is not terminated.  The result is that
       it continues to prune entries past where it should (prunes
       everything).  If our patch for this is not applied, the result
       is a kernel failure as the cxiNode is referenced.  Checking
       here (and revalidate) allows us to reject the call instead. */

    TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_LOOKUP_STALE,
           "cxiNode for inode 0x%lX (ino 0x%X) was FREED!\n",
           diP, diP->i_ino);

    PRINTINODE(diP);
    rc = ESTALE;
    code = 1;
    retP = (struct dentry *)ERR_PTR(-rc);
    goto xerror;
  }

  rc = gpfs_ops.gpfsLookup(privVfsP, (void *)diP, dcnP,
                           dentryP, (char *)dentryP->d_name.name,
                           (void **)&newInodeP, &cnP, &iNum, NULL,
                           &mode, &eCred);

  if (rc == 0)
  {
    DBGASSERT(cnP != NULL);
    DBGASSERT(iNum != -1);
    DBGASSERT(newInodeP != NULL);
    DBGASSERT(newInodeP->u.generic_ip == cnP);
    DBGASSERT(cnP->osNodeP == (void *)newInodeP);
  }
  else if (rc != ENOENT) // internal failure
  {
    code = 2;
    retP = (struct dentry *)ERR_PTR(-rc);
    goto xerror;
  }

  PRINTDENTRY(dentryP);

xerror:
  TRACE7(TRACE_VNODE, 1, TRCID_LINUXOPS_LOOKUP_EXIT,
         "gpfs_i_lookup: new inode 0x%lX iNum %d (0x%X) cnP 0x%lX retP 0x%lX "
         "code %d rc %d\n", newInodeP, iNum, iNum, cnP, retP, code, rc);

  VFS_STAT_STOP;
  return retP;
}

int
gpfs_i_link(struct dentry *oldDentryP, struct inode *diP,
            struct dentry *dentryP)
{
  int rc = 0;
  struct inode *iP = oldDentryP->d_inode;
  cxiNode_t *dcnP;
  cxiNode_t *cnP = NULL;
  struct gpfsVfsData_t *privVfsP;
  char *tnameP;
  ext_cred_t eCred;

  VFS_STAT_START(linkCall);
  TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_LINK_ENTER,
         "gpfs_i_link enter: diP 0x%lX dentryP 0x%lX "
         "dentryP 0x%lX name '%s'\n", diP, oldDentryP, dentryP,
         dentryP->d_name.name);
  /* BKL is held at entry */

  cnP = VP_TO_CNP(iP);
  dcnP = VP_TO_CNP(diP);
  privVfsP = VP_TO_PVP(diP);
  DBGASSERT(privVfsP != NULL);

  setCred(&eCred);
  rc = gpfs_ops.gpfsLink(privVfsP, cnP, dcnP,
                         dentryP, (char *)dentryP->d_name.name, &eCred);
  if (rc)
  {
    d_drop(dentryP);
    goto xerror;
  }
#if LINUX_KERNEL_VERSION >= 2040900
  iP->i_sb->s_dirt = 1;
#endif

xerror:
  PRINTINODE(iP);
  TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_LINK_EXIT,
         "gpfs_i_link exit: diP 0x%lX iP 0x%lX rc %d\n", diP, iP, rc);

  VFS_STAT_STOP;
  return -rc;
}

int
gpfs_i_unlink(struct inode *diP, struct dentry *dentryP)
{
  int rc = 0;
  struct gpfsVfsData_t *privVfsP;
  struct inode *iP = dentryP->d_inode;
  cxiNode_t *dcnP;
  cxiNode_t *cnP;
  ext_cred_t eCred;

  VFS_STAT_START(removeCall);
  TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_UNLINK_ENTER,
         "gpfs_i_unlink enter: diP 0x%lX iP 0x%lX dentryP 0x%lX name '%s'\n",
         diP, iP, dentryP, dentryP->d_name.name);
  /* BKL is held at entry */

  cnP = VP_TO_CNP(iP);

  dcnP = VP_TO_CNP(diP);
  privVfsP = VP_TO_PVP(diP);
  DBGASSERT(privVfsP != NULL);

  /* Regarding dcache entry update: upon returning from gpfs_i_unlink, the VFS
     layer will turn the dentry into a valid, negative dcache entry by calling
     d_delete().  If another node then creates a new file with the same name,
     the BR token revoke for the directory block will invalidate the negative
     dcache entry.  However, there is a window between the gpfsRemove() and
     the d_delete(), where a BR token revoke would not recognize that it
     should invalidate the dcache entry, because d_delete() has not yet turned
     it into a negative dcache entry.  To fix this, we mark the dentry as
     "valid with d_delete pending"; the meaning of this state is "the dentry
     is still valid, but a BR token revoke should mark it as invalid, even if
     it does not (yet) look like a negative dcache entry".  Ideally, this
     should happen inside gpfsRemove while we are holding the BR lock on the
     directory.  However, (1) there is local synchronization in the VFS (our
     caller is holding the i_sem semaphore on the directory) that will prevent
     other threads from doing a lookup or create that might change the state
     back to just plain "valid" before the gpfsRemove has happened, and (2) a
     BR revoke that happens before the gpfsRemove might unnecessarily mark the
     dentry as invalid; this is sub-optimal, but it doesn't hurt. */
  dentryP->d_op = &gpfs_dops_ddeletepending;

  setCred(&eCred);
  rc = gpfs_ops.gpfsRemove(privVfsP, cnP, dcnP, (char *)dentryP->d_name.name,
                           &eCred);
  if (rc)
  {
    d_drop(dentryP);
    goto xerror;
  }
#if LINUX_KERNEL_VERSION >= 2040900
  diP->i_sb->s_dirt = 1;
#endif

  /* d_delete will be called at VFS layer  */

xerror:
  PRINTINODE(iP);
  TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_UNLINK_EXIT,
         "gpfs_i_unlink exit: diP 0x%lX iP 0x%lX rc %d\n", diP, iP, rc);

  VFS_STAT_STOP;
  return -rc;
}

int
gpfs_i_symlink(struct inode *diP, struct dentry *dentryP,
               const char *symlinkTargetP)
{
  int rc = 0;
  cxiNode_t *dcnP;
  cxiNode_t *cnP;
  cxiIno_t iNum = (cxiIno_t)-1;
  struct inode *newInodeP = NULL;
  struct gpfsVfsData_t *privVfsP;
  ext_cred_t eCred;

  VFS_STAT_START(symlinkCall);
  TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_SYMLINK1,
         "gpfs_i_symlink enter: iP 0x%lX dentryP 0x%lX symlinkTargetP '%s'\n",
         diP, dentryP, symlinkTargetP);
  TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_SYMLINK2,
         "gpfs_i_symlink enter: newLinkName '%s'\n", dentryP->d_name.name);
  /* BKL is held at entry */

  dcnP = VP_TO_CNP(diP);
  privVfsP = VP_TO_PVP(diP);
  DBGASSERT(privVfsP != NULL);

  setCred(&eCred);
  rc = gpfs_ops.gpfsSymlink(privVfsP, dcnP, (void **)&newInodeP, &cnP,
                            &iNum, dentryP, (char *)dentryP->d_name.name,
                            (char *)symlinkTargetP, &eCred);
  if (rc == 0)
  {
    DBGASSERT(cnP != NULL);
    DBGASSERT(iNum != -1);
    DBGASSERT(newInodeP != NULL);
    DBGASSERT(newInodeP->u.generic_ip == cnP);
    DBGASSERT(cnP->osNodeP == (void *)newInodeP);
  }
  else
  {
    d_drop(dentryP);
    goto xerror;
  }
#if LINUX_KERNEL_VERSION >= 2040900
  diP->i_sb->s_dirt = 1;
#endif

xerror:
  TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_SYMLINK_EXIT,
         "gpfs_i_symlink exit: new inode 0x%lX iNum %d (0x%X) rc %d\n",
         newInodeP, iNum, iNum, rc);

  VFS_STAT_STOP;
  return -rc;
}

int
gpfs_i_mkdir(struct inode *diP, struct dentry *dentryP, int mode)
{
  int rc = 0;
  struct gpfsVfsData_t *privVfsP;
  cxiNode_t *dcnP;
  cxiNode_t *cnP;
  cxiMode_t umask;
  ext_cred_t eCred;
  cxiIno_t iNum = (cxiIno_t)-1;
  struct inode *newInodeP = NULL;
  
  VFS_STAT_START(mkdirCall);
  umask = get_umask();  /* LFS should not apply umask and we may not */

  dcnP = VP_TO_CNP(diP);
  privVfsP = VP_TO_PVP(diP);
  DBGASSERT(privVfsP != NULL);

  TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_MKDIR_ENTER,
         "gpfs_i_mkdir enter: diP 0x%lX mode 0x%X name '%s'\n",
         diP, mode, dentryP->d_name.name);
  /* BKL is held at entry */

  setCred(&eCred);
  rc = gpfs_ops.gpfsMkdir(privVfsP, dcnP, (void **)&newInodeP, &cnP, &iNum,
                          dentryP, (char *)dentryP->d_name.name, mode, umask,
                          &eCred);

  if (rc == 0)
  {
    DBGASSERT(cnP != NULL);
    DBGASSERT(iNum != -1);
    DBGASSERT(newInodeP != NULL);
    DBGASSERT(newInodeP->u.generic_ip == cnP);
    DBGASSERT(cnP->osNodeP == (void *)newInodeP);
  }
  else
  {
    d_drop(dentryP);
    goto xerror;
  }
#if LINUX_KERNEL_VERSION >= 2040900
  diP->i_sb->s_dirt = 1;
#endif

xerror:
  TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_MKDIR_EXIT,
         "gpfs_i_mkdir exit: new inode 0x%lX iNum %d (0x%X) rc %d\n",
         newInodeP, iNum, iNum, rc);

  VFS_STAT_STOP;
  return -rc;
}

int
gpfs_i_rmdir(struct inode *diP, struct dentry *dentryP)
{
  int rc;
  struct inode *iP = dentryP->d_inode;
  cxiNode_t *dcnP;
  cxiNode_t *cnP;
  struct gpfsVfsData_t *privVfsP;
  ext_cred_t eCred;

  VFS_STAT_START(rmdirCall);
  TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_RMDIR_ENTER,
         "gpfs_i_rmdir enter: diP 0x%lX iP 0x%lX name '%s'\n",
         diP, iP, dentryP->d_name.name);
  /* BKL is held at entry */

  cnP = VP_TO_CNP(iP);
  dcnP = VP_TO_CNP(diP);
  privVfsP = VP_TO_PVP(diP);
  DBGASSERT(privVfsP != NULL);

  /* see comment in gpfs_i_unlink */
  dentryP->d_op = &gpfs_dops_ddeletepending;

  setCred(&eCred);
  rc = gpfs_ops.gpfsRmdir(privVfsP, cnP, dcnP, (char *)dentryP->d_name.name,
                          &eCred);
  if (rc)
  {
    d_drop(dentryP);
    goto xerror;
  }
#if LINUX_KERNEL_VERSION >= 2040900
  diP->i_sb->s_dirt = 1;
#endif

  /* d_delete will be called at VFS layer */

xerror:
  PRINTINODE(iP);
  TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_RMDIR_EXIT,
         "gpfs_i_rmdir exit: diP 0x%lX iP 0x%lX rc %d\n", diP, iP, rc);

  VFS_STAT_STOP;
  return -rc;
}

int
gpfs_i_mknod(struct inode *diP, struct dentry *dentryP, int mode, int rdev)
{
  int rc = 0;
  struct gpfsVfsData_t *privVfsP;
  cxiNode_t *dcnP;
  cxiNode_t *cnP;
  cxiIno_t iNum = (cxiIno_t)-1;
  struct inode *newInodeP = NULL;
  cxiMode_t umask = get_umask();
  ext_cred_t eCred;

  VFS_STAT_START(mknodCall);
  TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_MKNOD_ENTER,
         "gpfs_i_mknod enter: diP 0x%lX mode 0x%X rdev 0x%X name '%s'\n",
         diP, mode, rdev, dentryP->d_name.name);
  /* BKL is held at entry */

  dcnP = VP_TO_CNP(diP);
  privVfsP = VP_TO_PVP(diP);
  DBGASSERT(privVfsP != NULL);

  setCred(&eCred);
  rc = gpfs_ops.gpfsMknod(privVfsP, dcnP, (void **)&newInodeP, &cnP,
                          &iNum, dentryP, (char *)dentryP->d_name.name,
                          mode, umask, cxiKernelDevToDev(rdev), &eCred);
  if (rc == 0)
  {
    DBGASSERT(cnP != NULL);
    DBGASSERT(iNum != -1);
    DBGASSERT(newInodeP != NULL);
    DBGASSERT(newInodeP->u.generic_ip == cnP);
    DBGASSERT(cnP->osNodeP == (void *)newInodeP);
  }
  else
  {
    d_drop(dentryP);
    goto xerror;
  }
#if LINUX_KERNEL_VERSION >= 2040900
  diP->i_sb->s_dirt = 1;
#endif

  /* Set vector table for special files, gpfs will not get these operations.*/
  init_special_inode(newInodeP, newInodeP->i_mode,
                     cxiDevToKernelDev(newInodeP->i_rdev));

xerror:
  TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_MKNOD_EXIT,
         "gpfs_i_mknod exit: new inode 0x%lX iNum %d (0x%X) rc %d\n",
         newInodeP, iNum, iNum, rc);
  
  VFS_STAT_STOP;
  return -rc;
}

int
gpfs_i_rename(struct inode *diP, struct dentry *dentryP,
              struct inode *tdiP, struct dentry *tDentryP)
{
  int rc = 0;
  struct inode *iP = dentryP->d_inode;
  struct inode *tiP = tDentryP->d_inode;
  struct gpfsVfsData_t *privVfsP;
  cxiNode_t *sourceCNP, *sourceDirCNP, *targetCNP, *targetDirCNP;
  ext_cred_t eCred;
  
  VFS_STAT_START(renameCall);
  TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_RENAME_1,
         "gpfs_i_rename enter: iP 0x%lX dvP 0x%lX name '%s'\n",
         iP, diP, dentryP->d_name.name);
  TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_RENAME_2,
         "gpfs_i_rename: tiP 0x%lX tdiP 0x%lX new name '%s'\n",
         tiP, tdiP, tDentryP->d_name.name);
  /* BKL is held at entry */

  /* Do not allow simple rename across mount points */
  if (diP->i_sb != tdiP->i_sb)
  {
    rc = EXDEV;
    goto xerror;
  }

  sourceCNP = VP_TO_CNP(iP);
  sourceDirCNP = VP_TO_CNP(diP);

  targetCNP = (tiP != NULL) ? VP_TO_CNP(tiP) : NULL;
  targetDirCNP = VP_TO_CNP(tdiP);

  privVfsP = VP_TO_PVP(iP);
  DBGASSERT(privVfsP != NULL);

  setCred(&eCred);
  rc = gpfs_ops.gpfsRename(privVfsP, sourceCNP, sourceDirCNP,
                           (char *)dentryP->d_name.name, targetCNP,
                           targetDirCNP, (char *)tDentryP->d_name.name,
                           &eCred);
  if (rc == 0)
  {
    gpfs_i_getattr_internal(iP);
    gpfs_i_getattr_internal(diP);

    if (tiP)
      gpfs_i_getattr_internal(tiP);

    if (tdiP != diP)
      gpfs_i_getattr_internal(tdiP);

#if LINUX_KERNEL_VERSION >= 2040900
    diP->i_sb->s_dirt = 1;
#endif
  }

xerror:
  TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_RENAME_EXIT,
         "gpfs_i_rename exit: iP 0x%lX rc %d\n", iP, rc);

  VFS_STAT_STOP(renameCall);
  return -rc;
}

int
gpfs_i_readlink(struct dentry *dentryP, char *bufP, int buflen)
{
  int rc = 0;
  Boolean gotBKL = false;
  struct cxiUio_t tmpUio;
  cxiIovec_t tmpIovec;
  struct inode *iP = dentryP->d_inode;
  struct gpfsVfsData_t *privVfsP;
  cxiNode_t *cnP;
  
  VFS_STAT_START(readlinkCall);
  TRACE5(TRACE_VNODE, 1, TRCID_LINUXOPS_READLINK_ENTER,
         "gpfs_i_readlink enter: dentryP 0x%lX bufP 0x%lX len %d "
           "iP 0x%lX name '%s'\n",
         dentryP, bufP, buflen, iP, dentryP->d_name.name);

  /* BKL is not held at entry, except for NFS calls */
  TraceBKL();
  if (current->lock_depth >= 0)  /* kernel lock is held by me */
  {
    gotBKL = true;
    unlock_kernel();
  }

  cnP = VP_TO_CNP(iP);
  privVfsP = VP_TO_PVP(iP);
  DBGASSERT(privVfsP != NULL);

  tmpIovec.iov_base = bufP;     /* base memory address                 */
  tmpIovec.iov_len = buflen;    /* length of transfer for this area    */

  tmpUio.uio_iov = &tmpIovec;   /* ptr to array of iovec structs       */
  tmpUio.uio_iovcnt = 1;        /* #iovec elements remaining to be processed*/
  tmpUio.uio_iovdcnt = 0;       /* #iovec elements already processed   */
  tmpUio.uio_offset = 0;        /* byte offset in file/dev to read/write*/
  tmpUio.uio_resid = buflen;    /* #bytes left in data area            */
  tmpUio.uio_segflg = 0;        /* see segment flag value */
  tmpUio.uio_fmode = 0;         /* copy of file modes from open file struct */

  rc = gpfs_ops.gpfsReadlink(privVfsP, cnP, &tmpUio);

  TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_READLINK_EXIT,
        "gpfs_i_readlink exit: iP 0x%lX uio_resid %d offset %d rc %d\n",
         iP, tmpUio.uio_resid, tmpUio.uio_offset, rc);

  VFS_STAT_STOP;

  if (gotBKL)        /* If held kernel lock on entry then reacquire it */
    lock_kernel();

  if (rc)
    return (-rc);

  return (buflen - tmpUio.uio_resid);
}

int
gpfs_i_follow_link(struct dentry *dentry, struct nameidata *nd)
{
  int rc;
  Boolean gotBKL = false;
  char *buf;

  TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_FOLLOW_LINK_ENTER,
         "gpfs_i_follow_link enter: inode 0x%lX name '%s'\n",
         dentry->d_inode, dentry->d_name.name);

  /* BKL is not held at entry, except for NFS calls */
  TraceBKL();
  if (current->lock_depth >= 0)  /* kernel lock is held by me */
  {
    gotBKL = true;
    unlock_kernel();
  }

  /* Allocate a temporary buffer to hold the symlink contents */
  buf = __getname();
  if (buf == NULL)
  {
    rc = -ENOMEM;
    goto xerror;
  }

  /* Read symlink contents */
  rc = gpfs_i_readlink(dentry, buf, PATH_MAX);
  if (rc > 0 && rc <= PATH_MAX)
    buf[rc] = 0;                     // set end of string

  TRACE2(TRACE_VNODE, 2, TRCID_LINUXOPS_FOLLOW_LINK_1,
         "gpfs_i_follow_link readlink rc %d data '%s'\n", rc, buf);

  rc = vfs_follow_link(nd, buf);
  putname(buf);

xerror:

  TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_FOLLOW_LINK_2,
         "gpfs_i_follow_link exit: inode 0x%lX rc %d\n",
         dentry->d_inode, rc);

  if (gotBKL)        /* If held kernel lock on entry then reacquire it */
    lock_kernel();

  return rc;
}

int
gpfs_i_readpage(struct file *fP, struct page *pageP)
{
  int rc = 0;
  struct dentry *dentryP = fP->f_dentry;
  struct inode *iP = fP->f_dentry->d_inode;
  struct gpfsVfsData_t *privVfsP;
  cxiNode_t *cnP;
  caddr_t address;

  VFS_STAT_START(readpageCall);
  TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_READPAGE_ENTER,
         "gpfs_i_readpage enter: dentryP 0x%lX page 0x%lX iP 0x%lX name '%s'\n",
         dentryP,  pageP, iP, dentryP->d_name.name);
  TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_READPAGE_ENTER_A,
         "gpfs_i_readpage: page 0x%lX index %d count %d flags 0x%lX\n",
         pageP, pageP->index, page_count(pageP),pageP->flags);

  cnP = VP_TO_CNP(iP);
  privVfsP = VP_TO_PVP(iP);
  DBGASSERT(privVfsP != NULL);

  /* Increment page counter. No need to lock the page. It is already
     locked */
  get_page(pageP);

  address = kmap(pageP);	 

  rc = gpfsRWpage(privVfsP, cnP, (struct MMFSVInfo *)fP->private_data, address, (void *)pageP,0);
  if (rc)
  {
    kunmap(pageP);
    put_page(pageP);
  }

  TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_READPAGE_EXIT,
         "gpfs_i_readpage exit: iP 0x%lX rc %d\n", iP, rc);

  VFS_STAT_STOP;
  return -rc;
}

int
# if LINUX_KERNEL_VERSION >= 2040000
gpfs_i_writepage(struct page *pageP)
#else
gpfs_i_writepage(struct file *fP, struct page *pageP)
#endif
{
  int rc = 0;
  struct inode *iP =(struct inode *) pageP->mapping->host;
  struct gpfsVfsData_t *privVfsP;
  cxiNode_t *cnP;
  caddr_t address;

  VFS_STAT_START(writepageCall);
  TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_WRPAGE_ENTER,
         "gpfs_i_writepage enter: page 0x%lX iP 0x%lX inode_num %d\n",
          pageP, iP, iP->i_ino);
  TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_WRPAGE_ENTER_A,
         "gpfs_i_writepage: page 0x%lX index %d count %d flag 0x%lX \n",
         pageP, pageP->index, page_count(pageP), pageP->flags);

  cnP = VP_TO_CNP(iP);
  privVfsP = VP_TO_PVP(iP);
  DBGASSERT(privVfsP != NULL);

  /* Increment page counter. No need to lock the page. It is already
     locked */
  get_page(pageP);

  address = kmap(pageP);	 

  rc = gpfsRWpage(privVfsP, cnP, NULL, address, (void *)pageP,1);
  if (rc)
  {
    kunmap(pageP);
    put_page(pageP);
  }
# if LINUX_KERNEL_VERSION < 2040000
  wait_on_page(pageP);
  lock_page(pageP);
#endif

  TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_WRPAGE_EXIT,
         "gpfs_i_writepage exit: iP 0x%lX rc %d\n", iP, rc);

  VFS_STAT_STOP;
  return -rc;
}

void gpfs_i_truncatepage(struct page *pageP)
{
   struct inode *iP =(struct inode *) pageP->mapping->host;
   TRACE5(TRACE_VNODE, 1, TRCID_LINUXOPS_TRUNCATE_PAGE_ENTER,
          "gpfs_i_truncatepage: inode 0x%lX i_ino %d i_count %d page 0x%lX page_count %d\n",
           iP, iP->i_ino, atomic_read((atomic_t *)&iP->i_count),
           pageP, page_count(pageP));
}

int
gpfs_i_bmap(struct inode *iP, int fragment)
{
  TRACE0(TRACE_VNODE, 1, TRCID_LINUXOPS_BMAP,
         "gpfs_i_bmap enter: rc ENOSYS\n");
  TraceBKL();
  return -ENOSYS;
}

void
gpfs_i_truncate(struct inode *iP)
{
  /* Nothing to do since the file size was updated on the notify_change
   * call which preceeded this call
   */
  TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_TRUNCATE,
         "gpfs_i_truncate enter: inode 0x%lX\n", iP);
  TraceBKL();
}

int
gpfs_i_permission(struct inode *iP, int mode)
{
  cxiNode_t *cnP;
  struct gpfsVfsData_t *privVfsP;
  ext_cred_t eCred;
  int rc = 0;

  VFS_STAT_START(accessCall);

  /* BKL is held at entry */

  cnP = VP_TO_CNP(iP);

  TRACE6(TRACE_VNODE, 1, TRCID_LINUXOPS_ACCESS_ENTER,
         "gpfs_i_permission enter: iP 0x%lX mode 0x%X uid %d gid %d "
         "i_mode 0x%X i_xinfo 0x%X", iP, mode, current->fsuid, 
         current->fsgid, iP->i_mode, cnP->xinfo);

  privVfsP = VP_TO_PVP(iP);
  DBGASSERT(privVfsP != NULL);

  if (mode)        /* call permission check only if got access mode */
  {
    setCred(&eCred);
    rc = gpfs_ops.gpfsAccess(privVfsP, cnP, mode, ACC_SELF, &eCred);
  }

xerror:
  TRACE4(TRACE_VNODE, 1, TRCID_LINUXOPS_ACCESS_EXIT,
         "gpfs_i_permission exit: iP 0x%lX std %d dir std %d rc %d",
         iP, iP->i_op == &gpfs_iops_stdperm, iP->i_op == &gpfs_dir_iops_stdperm,
         rc);

  VFS_STAT_STOP;
  return -rc;
}

int
gpfs_i_smap(struct inode *iP, int sector)
{
  TRACE0(TRACE_VNODE, 1, TRCID_LINUXOPS_SMAP,
         "gpfs_i_smap enter: rc ENOSYS\n");
  TraceBKL();
  return -ENOSYS;
}

int
gpfs_i_updatepage(struct file *fP, struct page *pageP, const char *bufP,
                  unsigned long offset, uint count, int sync)
{
  TRACE0(TRACE_VNODE, 1, TRCID_LINUXOPS_UPDATEPAGE,
         "gpfs_i_updatepage enter: rc ENOSYS\n");
  TraceBKL();
  return -ENOSYS;
}

int
gpfs_i_revalidate(struct dentry *dentryP)
{
  int rc;
  int code = 0;
  struct inode *iP = dentryP->d_inode;
  cxiNode_t *cnP;
  cxiVattr_t vattr;
  struct gpfsVfsData_t *privVfsP;

  VFS_INC(revalidateCount);
  TRACE4(TRACE_VNODE, 2, TRCID_LINUXOPS_REVALIDATE_ENTER,
         "gpfs_i_revalidate enter: dentryP 0x%lX iP 0x%lX ino 0x%X name '%s'\n",
         dentryP, dentryP->d_inode, 
         (iP) ? iP->i_ino : -1,  dentryP->d_name.name);
  /* BKL is usually not held, but seems to be held when coming here as
     part of setting an ACL */

  if (iP == NULL)
  {
    code = 1;
    rc = ENOENT;
    goto xerror;
  }
  cnP = VP_TO_CNP(iP);

  if (!cnP)
  {
    /* This can happen due to a bug in linux/fs/dcache.c (prune_dcache)
       where "count" entries are to be pruned, but the last one is
       found to be recently referenced.  When this happens, count is
       decremented, but the loop is not terminated.  The result is that
       it continues to prune entries past where it should (prunes
       everything).  If our patch for this is not applied, the result
       is a kernel failure as the cxiNode is referenced.  Checking
       here (and lookup) allows us to reject the call instead. */
      
    TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_REVALIDATE_STALE,
           "gpfs_i_revalidate: cxiNode for iP 0x%lX (ino %d) was FREED!\n",
           iP, iP->i_ino);

    PRINTINODE(iP);
    rc = ESTALE;
    code = 2;
    goto xerror;
  }

  if ((cnP->icValid & CXI_IC_STAT) == CXI_IC_STAT)
  {
    rc = 0;
    code = 3;
    goto xerror;
  }

  privVfsP = VP_TO_PVP(iP);
  DBGASSERT(privVfsP != NULL);

  /* This has the effect of calling us back under a lock and 
   * setting the inode attributes at the OS level (since this 
   * operating system caches this info in the vfs layer)
   */
  rc = gpfs_ops.gpfsGetattr(privVfsP, cnP, &vattr, false);
  PRINTINODE(iP);

#if 0
  /* Delay briefly to give token revoke races a chance to happen, if there
     are any.  Time delay is in jiffies (10ms). */
#  define howLong 5
  TRACE1(TRACE_VNODE, 4, TRCID_REVAL_DELAY,
         "gpfs_i_revalidate: begin delay %d\n", howLong);
  current->state = TASK_INTERRUPTIBLE;
  schedule_timeout(howLong);
  TRACE1(TRACE_VNODE, 14, TRCID_REVAL_DELAY_END,
         "gpfs_i_revalidate: end delay %d\n", howLong);
#endif

xerror:
  TRACE3(TRACE_VNODE, 2, TRCID_LINUXOPS_REVALIDATE_EXIT,
         "gpfs_i_revalidate exit: dentry 0x%lX code %d rc %d\n",
         dentryP, code, rc);

  return -rc;
}

int
gpfs_i_setattr(struct dentry *dentryP, struct iattr *iattrP)
{
  int rc;

  VFS_STAT_START(setattrCall);
  rc = gpfs_i_setattr_internal(dentryP->d_inode, iattrP);
  VFS_STAT_STOP;
  return -rc;
}

int
gpfs_i_setattr_internal(struct inode *iP, struct iattr *aP)
{
  int rc = 0;
  int code = 0;
  long arg1;      /* must be large enough on 64bit to contain */
  long arg2;      /*   either a pointer or integer            */
  long arg3;
  cxiTimeStruc_t atime, mtime, ctime;
  cxiNode_t *cnP;
  struct gpfsVfsData_t *privVfsP;
  ext_cred_t eCred;
  unsigned int ia_valid;

  TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_SETATTR_ENTER,
         "gpfs_i_setattr enter: iP 0x%lX ia_valid 0x%X ia_attr_flags 0x%X\n",
         iP, aP->ia_valid, aP->ia_attr_flags);
  /* ?? Callers of this are inconsistent about whether the BKL is held */

  cnP = VP_TO_CNP(iP);
  privVfsP = VP_TO_PVP(iP);
  DBGASSERT(privVfsP != NULL);

  ia_valid = aP->ia_valid;
  setCred(&eCred);

  /* Change file size */
  if (ia_valid & ATTR_SIZE)
  {
    arg1 = (long)&aP->ia_size;
    arg2 = 0;
    arg3 = 0;

    /* call gpfsSetattr, unless we know that new size is the same */
    if (!(cnP->icValid & CXI_IC_ATTR) ||
        ((struct inode *)cnP->osNodeP)->i_size != aP->ia_size)
    {
      rc = gpfs_ops.gpfsSetattr(privVfsP, cnP, V_SIZE, arg1, arg2, arg3,
                                &eCred);
      if (rc != 0)
      {
        code = 1;
        goto xerror;
      }

      /* gpfsSetattr(... V_SIZE ...) will have updated ctime and mtime.
         No need to do this again. */
      ia_valid &= ~(ATTR_MTIME | ATTR_CTIME);
    }
  }

  /* Change file mode */
  if (ia_valid & ATTR_MODE)
  {
    arg1 = (long)aP->ia_mode;
    arg2 = 0;
    arg3 = 0;

    rc = gpfs_ops.gpfsSetattr(privVfsP, cnP, V_MODE, arg1, arg2, arg3, &eCred);
    if (rc != 0)
    {
      code = 2;
      goto xerror;
    }
  }

  /* Change uid or gid */
  if (ia_valid & (ATTR_UID | ATTR_GID))
  {
    arg1 = 0;
    arg2 = 0;
    arg3 = 0;

    if (ia_valid & ATTR_UID)
      arg2 = (long)aP->ia_uid;
    else
      arg1 |= T_OWNER_AS_IS;

    if (ia_valid & ATTR_GID)
      arg3 = (long)aP->ia_gid;
    else
      arg1 |= T_GROUP_AS_IS;

    rc = gpfs_ops.gpfsSetattr(privVfsP, cnP, V_OWN, arg1, arg2, arg3, &eCred);
    if (rc != 0)
    {
      code = 3;
      goto xerror;
    }
  }

  /* Change access, modification, or change time */
  if (ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME))
  {
    arg1 = 0;
    arg2 = 0;
    arg3 = 0;
   
    if (ia_valid & ATTR_ATIME)
    {
      atime.tv_sec = aP->ia_atime;
      atime.tv_nsec = 0;
      arg1 = (long)&atime;
    }
    if (ia_valid & ATTR_MTIME)
    {
      mtime.tv_sec = aP->ia_mtime;
      mtime.tv_nsec = 0;
      arg2 = (long)&mtime;
    }
    if (ia_valid & ATTR_CTIME)
    {
      ctime.tv_sec = aP->ia_ctime;
      ctime.tv_nsec = 0;
      arg3 = (long)&ctime;
    }
    rc = gpfs_ops.gpfsSetattr(privVfsP, cnP, V_STIME, arg1, arg2, arg3, &eCred);
    if (rc != 0)
    {
      code = 4;
      goto xerror;
    }
  }

xerror:

  if (rc == 0)
  {
    /* For NFS we might need to write the inode but the check will be done 
     * in gpfsSyncNFS(). 
     */
    if (cxiAllowNFSFsync())
      rc = gpfs_ops.gpfsSyncNFS(privVfsP, cnP, 0, &eCred);

#if LINUX_KERNEL_VERSION >= 2040900
    iP->i_sb->s_dirt = 1;
#endif
  }
  TRACE3(TRACE_VNODE, 1, TRCID_LINUXOPS_SETATTR_EXIT,
         "gpfs_i_setattr exit: iP 0x%lX code %d rc %d\n", iP, code, rc);

  return rc;
}

int
gpfs_i_getattr(struct dentry *dentryP, struct iattr *iattrP)
{
  int rc;

  VFS_STAT_START(getattrCall);
  rc = gpfs_i_getattr_internal(dentryP->d_inode);
  /* Linux doc says this is unused; is this true? */

  if (!rc)
    getIattr(dentryP->d_inode, iattrP);
  else
    rc = -rc;

  VFS_STAT_STOP;
  return rc;
}

int
gpfs_i_getattr_internal(struct inode *iP)
{
  int rc = 0;
  cxiNode_t *cnP;
  struct gpfsVfsData_t *privVfsP;
  cxiVattr_t vattr;

  TRACE1(TRACE_VNODE, 1, TRCID_LINUXOPS_GETATTR_ENTER,
         "gpfs_i_getattr enter: iP 0x%lX\n", iP);
  /* BKL is held at entry */

  privVfsP = VP_TO_PVP(iP);
  DBGASSERT(privVfsP != NULL);
  cnP = VP_TO_CNP(iP);

  /* This has the effect of calling us back under a lock and 
   * setting the inode attributes at the OS level (since this 
   * operating system caches this info in the vfs layer)
   */
  rc = gpfs_ops.gpfsGetattr(privVfsP, cnP, &vattr, false);
  PRINTINODE(iP);

  TRACE2(TRACE_VNODE, 1, TRCID_LINUXOPS_GETATTR_EXIT,
         "gpfs_i_getattr exit: iP 0x%lX rc %d\n", iP, rc);

  return rc;
}
